Sections

  1. Our Libraries & Data.
  2. Exploratory Data Analysis
  3. Models and Residual Analysis
  4. Choose Best Model(s)
  5. Forecast
  6. Conclusion

Our Libraries and Data

library(fpp)
## Loading required package: forecast
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
## Loading required package: fma
## Loading required package: expsmooth
## Loading required package: lmtest
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## Loading required package: tseries
library(fpp2)
## ── Attaching packages ────────────────────────────────────────────── fpp2 2.5 ──
## ✔ ggplot2 3.5.1
## 
## 
## Attaching package: 'fpp2'
## The following objects are masked from 'package:fpp':
## 
##     ausair, ausbeer, austa, austourists, debitcards, departures,
##     elecequip, euretail, guinearice, oil, sunspotarea, usmelec
library(forecast)

cpi_it <- read.csv("/Users/mattcolao/Downloads/CPI_feb_nonseasonal.csv")
cpi_ts <- ts(cpi_it[,2], start=c(1988,12), frequency = 12)

sum(is.na(cpi_it))
## [1] 0
anyDuplicated(cpi_it)
## [1] 0
boxplot(cpi_ts)

summary(cpi_ts)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   6.721   7.906  10.900  28.555  46.800 100.000

Exploratory Data Analysis

# Plot
plot(cpi_ts, 
     main = "Consumer Price Index (CPI) Over Time", 
     xlab = "Year", 
     ylab = "CPI", 
     col = "steelblue", 
     lwd = 2)

# Acf
Acf(cpi_ts, 
    lag.max = 200, 
    main = "Autocorrelation Function of CPI Time Series")

# Decomposition Plot
decomp_cpi <- stl(cpi_ts, s.window ='periodic')
plot(decomp_cpi, 
     main = "STL Decomposition of CPI Time Series")

# Additive or Seasonal
a_o_m <- decompose(cpi_ts)
a_o_m$type
## [1] "additive"
# Seasonally Adjust
seas_cpi_decomp <- seasadj(decomp_cpi)
plot(seas_cpi_decomp,
     main = "Seasonally Adjusted CPI vs Original CPI",
     ylab = "CPI",
     xlab = "Year",
     col = "blue",
     lwd = 2)
lines(cpi_ts, col = "red", lwd = 1.5, lty = 2)
legend("topright",
       legend = c("Seasonally Adjusted", "Original"),
       col = c("blue", "red"),
       lty = c(1, 2),
       lwd = c(2, 1.5),
       bty = "n")

cpi_ts <- window(cpi_ts, start = c(2009, 1))

Models and Residual Analysis

# Naive
naive_cpi <- naive(cpi_ts)
res_naive_cpi <- residuals(naive_cpi)
plot(res_naive_cpi,
     main = "Residuals of Naive Model for CPI",
     ylab = "Residuals",
     xlab = "Time",
     col = "darkgray",
     lwd = 1.5)
abline(h = 0, col = "red", lwd = 2, lty = 2)
grid() 

fitted_naive <- as.numeric(fitted(naive_cpi))
residuals_naive <- as.numeric(residuals(naive_cpi))

# Plot residuals vs fitted values
df_naive <- na.omit(data.frame(
  fitted = as.numeric(fitted(naive_cpi)),
  resid  = as.numeric(residuals(naive_cpi))
))
plot(df_naive$fitted, df_naive$resid,
     main = "Residuals vs Fitted (Naive)", xlab = "Fitted", ylab = "Residuals",
     pch = 16)

checkresiduals(naive_cpi)

## 
##  Ljung-Box test
## 
## data:  Residuals from Naive method
## Q* = 59.495, df = 24, p-value = 7.524e-05
## 
## Model df: 0.   Total lags used: 24
shapiro.test(residuals(naive_cpi))
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(naive_cpi)
## W = 0.99236, p-value = 0.4095
# Simple Moving Averages
ma3_cpi <- ma(cpi_ts, order = 3)
ma6_cpi <- ma(cpi_ts, order = 6)
ma9_cpi <- ma(cpi_ts, order = 9)
plot(cpi_ts, 
     main = "CPI with Simple Moving Averages (3, 6, 9)", 
     ylab = "CPI", 
     xlab = "Time", 
     col = "black", 
     lwd = 2)
lines(ma3_cpi, col = "red", lwd = 2)
lines(ma6_cpi, col = "blue", lwd = 2)
lines(ma9_cpi, col = "darkgreen", lwd = 2)
legend("topright", 
       legend = c("Original CPI", "MA(3)", "MA(6)", "MA(9)"),
       col = c("black", "red", "blue", "darkgreen"),
       lty = 1, 
       lwd = 2,
       bty = "n")
grid()

ma3_forecast_cpi <- forecast(na.omit(ma3_cpi), h=5)
ma3_trailing <- stats::filter(cpi_ts, rep(1/3, 3), sides = 1)

# Forecast from trailing MA (omit initial NAs)
ma3_forecast_cpi <- forecast(na.omit(ma3_trailing), h = 5)

# Plot forecast
autoplot(ma3_forecast_cpi) +
  labs(title = "Forecast from Trailing MA(3)",
       x = "Time", y = "CPI") +
  theme_minimal()

res_ma3 <- na.omit(cpi_ts - ma3_cpi)
res_ma6 <- na.omit(cpi_ts - ma6_cpi)
res_ma9 <- na.omit(cpi_ts - ma9_cpi)

rmse_ma3 <- sqrt(mean(res_ma3^2))
rmse_ma6 <- sqrt(mean(res_ma6^2))
rmse_ma9 <- sqrt(mean(res_ma9^2))

cat("RMSE:\nMA(3):", rmse_ma3, "\nMA(6):", rmse_ma6, "\nMA(9):", rmse_ma9, "\n")
## RMSE:
## MA(3): 0.01522434 
## MA(6): 0.02831507 
## MA(9): 0.03647768
library(ggplot2)
par(mfrow = c(3, 1), mar = c(4, 4, 2, 1))
plot(res_ma3,
     main = "Residuals from MA(3) Model",
     ylab = "Residuals",
     xlab = "Time",
     col = "darkgray",
     lwd = 1.5)
abline(h = 0, col = "red", lty = 2)
grid()

Acf(res_ma3,
    main = "ACF of MA(3) Residuals")
hist(res_ma3,
     main = "Histogram of MA(3) Residuals",
     xlab = "Residuals",
     col = "lightblue",
     border = "white")
abline(v = mean(res_ma3), col = "red", lwd = 2, lty = 2)

par(mfrow = c(1, 1))
shapiro.test(res_ma3)
## 
##  Shapiro-Wilk normality test
## 
## data:  res_ma3
## W = 0.99492, p-value = 0.7637
# Exponential Smoothing
ets_cpi <- ets(cpi_ts)
ets_forecast_cpi <- forecast(ets_cpi, h =5)
plot(ets_cpi)

coef(ets_cpi)
##       alpha        beta           l           b 
##  0.99977499  0.02355652  9.94295563 -0.02473033
checkresiduals(ets_cpi)

## 
##  Ljung-Box test
## 
## data:  Residuals from ETS(A,A,N)
## Q* = 62.613, df = 24, p-value = 2.707e-05
## 
## Model df: 0.   Total lags used: 24
sqrt(ets_cpi$sigma2)
## [1] 0.03891258
fitted_vals <- as.numeric(fitted(ets_cpi))
residual_vals <- as.numeric(residuals(ets_cpi))

# Plot
df_ets <- na.omit(data.frame(
  fitted = as.numeric(fitted(ets_cpi)),
  resid  = as.numeric(residuals(ets_cpi))
))
plot(df_ets$fitted, df_ets$resid,
     main = "Residuals vs Fitted (ETS)", xlab = "Fitted", ylab = "Residuals",
     pch = 16)

# Holt-Winters
hw_cpi <- HoltWinters(cpi_ts)
hw_forecast_cpi <- forecast(hw_cpi, h = 5)
hw_cpi$alpha
##     alpha 
## 0.7301448
hw_cpi$beta
##        beta 
## 0.009435855
hw_cpi$gamma
## gamma 
##     1
hw_cpi$coefficients
##            a            b           s1           s2           s3           s4 
##  6.781204781 -0.014523423 -0.005996446  0.017243926  0.026524345  0.060220445 
##           s5           s6           s7           s8           s9          s10 
##  0.079560875  0.073448158  0.041040908 -0.006688325 -0.053080070 -0.034382941 
##          s11          s12 
##  0.011064098  0.022795219
sqrt(hw_cpi$SSE/length(cpi_ts))
## [1] 0.04664606
rmse <- sqrt(hw_cpi$SSE / length(cpi_ts))
hw_summary <- rbind(
  Alpha               = round(hw_cpi$alpha, 4),
  Beta                = round(hw_cpi$beta, 4),
  Gamma               = round(hw_cpi$gamma, 4),
  Level_a             = round(hw_cpi$coefficients["a"], 4),
  Trend_b             = round(hw_cpi$coefficients["b"], 4),
  Seasonality_s1      = round(hw_cpi$coefficients["s1"], 4),  # Add more s2, s3 if seasonal
  RMSE                = round(rmse, 4)
)
print(hw_summary)
##                  alpha
## Alpha           0.7301
## Beta            0.0094
## Gamma           1.0000
## Level_a         6.7812
## Trend_b        -0.0145
## Seasonality_s1 -0.0060
## RMSE            0.0466
fitted_vals <- as.numeric(fitted(hw_cpi))
residual_vals <- as.numeric(residuals(hw_cpi))

# Plot
df_hw <- na.omit(data.frame(
  fitted = as.numeric(fitted(hw_cpi)),
  resid  = as.numeric(residuals(hw_cpi))
))
plot(df_hw$fitted, df_hw$resid,
     main = "Residuals vs Fitted (HW)", xlab = "Fitted", ylab = "Residuals",
     pch = 16)

# Fit the HoltWinters model
hw_cpi <- HoltWinters(cpi_ts)

# Extract time points corresponding to the fitted values.
# Note: HoltWinters only provides fitted values for a subset of the data.
fitted_time <- time(hw_cpi$fitted)

# Extract the actual values for the period covered by the fitted values.
actual_values <- window(cpi_ts, start = fitted_time[1])

# Extract fitted values and residuals from the model object.
fitted_values <- hw_cpi$fitted[, "xhat"]
residuals_hw <- residuals(hw_cpi)

# Create a data frame with the required columns.
df <- data.frame(
  time = fitted_time,
  actual = as.numeric(actual_values),
  fitted = as.numeric(fitted_values),
  residuals = as.numeric(residuals_hw)
)

# Load required libraries for plotting.
library(ggplot2)
library(gridExtra)

# Plot 1: Fitted Values vs Residuals
p1 <- ggplot(df, aes(x = fitted, y = residuals)) +
  geom_point(color = "blue", alpha = 0.6) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
  labs(title = "Fitted vs Residuals", x = "Fitted Values", y = "Residuals") +
  theme_minimal()

# Plot 2: Actual Values vs Residuals
p2 <- ggplot(df, aes(x = actual, y = residuals)) +
  geom_point(color = "darkgreen", alpha = 0.6) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
  labs(title = "Residuals vs Actual Values", x = "Actual Values", y = "Residuals") +
  theme_minimal()

# Combine the two plots side by side.
grid.arrange(p1, p2, ncol = 2)

checkresiduals(hw_cpi)

## 
##  Ljung-Box test
## 
## data:  Residuals from HoltWinters
## Q* = 171.22, df = 24, p-value < 2.2e-16
## 
## Model df: 0.   Total lags used: 24
# ARIMA
nsdiffs(cpi_ts)
## [1] 0
ndiffs(cpi_ts)
## [1] 2
# Acf, Pacf, and plot with differencing of 2
cpi_ts_diff2 <- diff(cpi_ts, differences = 2)

par(mfrow = c(3, 1), mar = c(4, 4, 2, 1))
plot(cpi_ts_diff2,
     main = "Second-Order Differenced CPI Time Series",
     ylab = "Differenced CPI",
     xlab = "Time",
     col = "steelblue",
     lwd = 2)
grid()
Acf(cpi_ts_diff2,
    main = "ACF of Differenced CPI")
Pacf(cpi_ts_diff2,
     main = "PACF of Differenced CPI")

par(mfrow = c(1, 1))


arima_cpi <- auto.arima(cpi_ts, trace = TRUE, stepwise = FALSE, seasonal = TRUE)
## 
##  Fitting models using approximations to speed things up...
## 
##  ARIMA(0,2,0)                               : -625.8805
##  ARIMA(0,2,0)(0,0,1)[12]                    : -629.2483
##  ARIMA(0,2,0)(0,0,2)[12]                    : -627.2869
##  ARIMA(0,2,0)(1,0,0)[12]                    : -646.6527
##  ARIMA(0,2,0)(1,0,1)[12]                    : -645.2021
##  ARIMA(0,2,0)(1,0,2)[12]                    : -643.1427
##  ARIMA(0,2,0)(2,0,0)[12]                    : -642.5866
##  ARIMA(0,2,0)(2,0,1)[12]                    : -643.0102
##  ARIMA(0,2,0)(2,0,2)[12]                    : -642.7085
##  ARIMA(0,2,1)                               : -677.1693
##  ARIMA(0,2,1)(0,0,1)[12]                    : -684.4805
##  ARIMA(0,2,1)(0,0,2)[12]                    : -684.9385
##  ARIMA(0,2,1)(1,0,0)[12]                    : -690.1083
##  ARIMA(0,2,1)(1,0,1)[12]                    : -693.316
##  ARIMA(0,2,1)(1,0,2)[12]                    : -691.5278
##  ARIMA(0,2,1)(2,0,0)[12]                    : Inf
##  ARIMA(0,2,1)(2,0,1)[12]                    : Inf
##  ARIMA(0,2,1)(2,0,2)[12]                    : Inf
##  ARIMA(0,2,2)                               : -695.5569
##  ARIMA(0,2,2)(0,0,1)[12]                    : -701.5944
##  ARIMA(0,2,2)(0,0,2)[12]                    : -700.8482
##  ARIMA(0,2,2)(1,0,0)[12]                    : -701.1167
##  ARIMA(0,2,2)(1,0,1)[12]                    : -701.1537
##  ARIMA(0,2,2)(1,0,2)[12]                    : -699.0239
##  ARIMA(0,2,2)(2,0,0)[12]                    : Inf
##  ARIMA(0,2,2)(2,0,1)[12]                    : Inf
##  ARIMA(0,2,3)                               : -693.6185
##  ARIMA(0,2,3)(0,0,1)[12]                    : -699.5589
##  ARIMA(0,2,3)(0,0,2)[12]                    : -698.7392
##  ARIMA(0,2,3)(1,0,0)[12]                    : -699.4251
##  ARIMA(0,2,3)(1,0,1)[12]                    : -699.7714
##  ARIMA(0,2,3)(2,0,0)[12]                    : Inf
##  ARIMA(0,2,4)                               : -691.5257
##  ARIMA(0,2,4)(0,0,1)[12]                    : -697.5025
##  ARIMA(0,2,4)(1,0,0)[12]                    : -697.7604
##  ARIMA(0,2,5)                               : -689.8925
##  ARIMA(1,2,0)                               : -641.246
##  ARIMA(1,2,0)(0,0,1)[12]                    : -645.3874
##  ARIMA(1,2,0)(0,0,2)[12]                    : -643.5887
##  ARIMA(1,2,0)(1,0,0)[12]                    : -661.6258
##  ARIMA(1,2,0)(1,0,1)[12]                    : -660.1144
##  ARIMA(1,2,0)(1,0,2)[12]                    : -658.0065
##  ARIMA(1,2,0)(2,0,0)[12]                    : -658.1445
##  ARIMA(1,2,0)(2,0,1)[12]                    : -658.3672
##  ARIMA(1,2,0)(2,0,2)[12]                    : -658.6523
##  ARIMA(1,2,1)                               : -691.3732
##  ARIMA(1,2,1)(0,0,1)[12]                    : -697.0558
##  ARIMA(1,2,1)(0,0,2)[12]                    : -696.3425
##  ARIMA(1,2,1)(1,0,0)[12]                    : Inf
##  ARIMA(1,2,1)(1,0,1)[12]                    : Inf
##  ARIMA(1,2,1)(1,0,2)[12]                    : Inf
##  ARIMA(1,2,1)(2,0,0)[12]                    : Inf
##  ARIMA(1,2,1)(2,0,1)[12]                    : Inf
##  ARIMA(1,2,2)                               : -701.3559
##  ARIMA(1,2,2)(0,0,1)[12]                    : -708.0632
##  ARIMA(1,2,2)(0,0,2)[12]                    : -707.0546
##  ARIMA(1,2,2)(1,0,0)[12]                    : Inf
##  ARIMA(1,2,2)(1,0,1)[12]                    : Inf
##  ARIMA(1,2,2)(2,0,0)[12]                    : Inf
##  ARIMA(1,2,3)                               : -710.8459
##  ARIMA(1,2,3)(0,0,1)[12]                    : -717.063
##  ARIMA(1,2,3)(1,0,0)[12]                    : Inf
##  ARIMA(1,2,4)                               : -708.7293
##  ARIMA(2,2,0)                               : -655.6081
##  ARIMA(2,2,0)(0,0,1)[12]                    : -663.3929
##  ARIMA(2,2,0)(0,0,2)[12]                    : -661.9197
##  ARIMA(2,2,0)(1,0,0)[12]                    : -684.4953
##  ARIMA(2,2,0)(1,0,1)[12]                    : -683.8523
##  ARIMA(2,2,0)(1,0,2)[12]                    : -681.726
##  ARIMA(2,2,0)(2,0,0)[12]                    : -678.8588
##  ARIMA(2,2,0)(2,0,1)[12]                    : -679.2263
##  ARIMA(2,2,1)                               : -682.9602
##  ARIMA(2,2,1)(0,0,1)[12]                    : -689.745
##  ARIMA(2,2,1)(0,0,2)[12]                    : -689.0566
##  ARIMA(2,2,1)(1,0,0)[12]                    : -730.8154
##  ARIMA(2,2,1)(1,0,1)[12]                    : -728.6929
##  ARIMA(2,2,1)(2,0,0)[12]                    : -725.2787
##  ARIMA(2,2,2)                               : Inf
##  ARIMA(2,2,2)(0,0,1)[12]                    : Inf
##  ARIMA(2,2,2)(1,0,0)[12]                    : Inf
##  ARIMA(2,2,3)                               : Inf
##  ARIMA(3,2,0)                               : -667.1238
##  ARIMA(3,2,0)(0,0,1)[12]                    : -674.8113
##  ARIMA(3,2,0)(0,0,2)[12]                    : -672.8657
##  ARIMA(3,2,0)(1,0,0)[12]                    : -688.3437
##  ARIMA(3,2,0)(1,0,1)[12]                    : -687.7024
##  ARIMA(3,2,0)(2,0,0)[12]                    : -683.2593
##  ARIMA(3,2,1)                               : -679.3806
##  ARIMA(3,2,1)(0,0,1)[12]                    : -683.6166
##  ARIMA(3,2,1)(1,0,0)[12]                    : Inf
##  ARIMA(3,2,2)                               : -701.2401
##  ARIMA(4,2,0)                               : -671.3563
##  ARIMA(4,2,0)(0,0,1)[12]                    : -678.8379
##  ARIMA(4,2,0)(1,0,0)[12]                    : -697.3069
##  ARIMA(4,2,1)                               : Inf
##  ARIMA(5,2,0)                               : -676.9983
## 
##  Now re-fitting the best model(s) without approximations...
## 
## 
## 
## 
##  Best model: ARIMA(2,2,1)(1,0,0)[12]
arima_cpi
## Series: cpi_ts 
## ARIMA(2,2,1)(1,0,0)[12] 
## 
## Coefficients:
##          ar1      ar2      ma1    sar1
##       0.2920  -0.0969  -0.9852  0.2613
## s.e.  0.0733   0.0727   0.0166  0.0778
## 
## sigma^2 = 0.001309:  log likelihood = 365.21
## AIC=-720.43   AICc=-720.11   BIC=-704.14
fitted_vals_arima <- as.numeric(fitted(arima_cpi))
residual_vals_arima <- as.numeric(residuals(arima_cpi))

plot(fitted_vals_arima, residual_vals_arima,
     main = "Fitted vs Residuals Values (ARIMA Model)",
     xlab = "Fitted Values",
     ylab = "Residuals",
     col = "darkblue",
     pch = 16,
     cex = 1.2)
abline(h = 0, col = "red", lwd = 2, lty = 2)
grid()

checkresiduals(arima_cpi)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(2,2,1)(1,0,0)[12]
## Q* = 14.181, df = 20, p-value = 0.8212
## 
## Model df: 4.   Total lags used: 24
tsdiag(arima_cpi,gof.lag = 10)

shapiro.test(residuals(arima_cpi))
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(arima_cpi)
## W = 0.99302, p-value = 0.487

Forecast

forecast_arima <- forecast(arima_cpi, h=5)
autoplot(forecast_arima) +
  labs(
    title = "5-Step Ahead Forecast for ARIMA Model",
    x = "Time",
    y = "CPI"
  ) +
  theme_minimal() +
  theme(plot.title = element_text(hjust = 0.5))

forecast_arima
##          Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## Mar 2025       6.798951 6.752589 6.845313 6.728047 6.869855
## Apr 2025       6.790816 6.714523 6.867110 6.674135 6.907497
## May 2025       6.764973 6.667326 6.862620 6.615635 6.914311
## Jun 2025       6.747866 6.633070 6.862662 6.572301 6.923431
## Jul 2025       6.743269 6.613346 6.873193 6.544569 6.941970
accuracy(forecast_arima)
##                       ME       RMSE        MAE        MPE      MAPE      MASE
## Training set 0.003007843 0.03561107 0.02739415 0.03744374 0.3453403 0.1406255
##                      ACF1
## Training set -0.002039569
model_211_100 <- Arima(cpi_ts, order = c(2,1,1),
                       seasonal = list(order = c(1,0,0), period = 12))
model_221_101 <- Arima(cpi_ts, order = c(2,2,1),
                       seasonal = list(order = c(1,0,1), period = 12))
fc_211_100 <- forecast(model_211_100, h = 5)
fc_221_101 <- forecast(model_221_101, h = 5)

accuracy(fc_211_100)
##                        ME       RMSE        MAE         MPE      MAPE      MASE
## Training set -0.007425151 0.03605797 0.02774145 -0.09024495 0.3489304 0.1424083
##                     ACF1
## Training set -0.04491967
accuracy(fc_221_101)
##                       ME       RMSE        MAE        MPE      MAPE      MASE
## Training set 0.002454401 0.03461615 0.02673601 0.02921089 0.3379801 0.1372469
##                      ACF1
## Training set 0.0001316574
acc_211_100 <- accuracy(fc_211_100)
acc_221_101 <- accuracy(fc_221_101)
acc_model1 <- as.data.frame(t(acc_211_100))
acc_model2 <- as.data.frame(t(acc_221_101))
acc_model1$Metric <- rownames(acc_model1)
acc_model2$Metric <- rownames(acc_model2)
acc_model1 <- acc_model1[, c("Metric", setdiff(names(acc_model1), "Metric"))]
acc_model2 <- acc_model2[, c("Metric", setdiff(names(acc_model2), "Metric"))]
cat("Accuracy measures for ARIMA(2,1,1)(1,0,0)[12]:\n")
## Accuracy measures for ARIMA(2,1,1)(1,0,0)[12]:
print(acc_model1)
##      Metric Training set
## ME       ME -0.007425151
## RMSE   RMSE  0.036057967
## MAE     MAE  0.027741448
## MPE     MPE -0.090244950
## MAPE   MAPE  0.348930402
## MASE   MASE  0.142408293
## ACF1   ACF1 -0.044919669
cat("\nAccuracy measures for ARIMA(2,2,1)(1,0,1)[12]:\n")
## 
## Accuracy measures for ARIMA(2,2,1)(1,0,1)[12]:
print(acc_model2)
##      Metric Training set
## ME       ME 0.0024544012
## RMSE   RMSE 0.0346161453
## MAE     MAE 0.0267360072
## MPE     MPE 0.0292108937
## MAPE   MAPE 0.3379801006
## MASE   MASE 0.1372469485
## ACF1   ACF1 0.0001316574
acc_forecast_arima <- accuracy(forecast_arima)
acc_original <- as.data.frame(t(acc_forecast_arima))
acc_original$Metric <- rownames(acc_original)
acc_original <- acc_original[, c("Metric", setdiff(names(acc_original), "Metric"))]
cat("Accuracy measures for the original model (forecast_cpi):\n")
## Accuracy measures for the original model (forecast_cpi):
print(acc_original)
##      Metric Training set
## ME       ME  0.003007843
## RMSE   RMSE  0.035611073
## MAE     MAE  0.027394151
## MPE     MPE  0.037443744
## MAPE   MAPE  0.345340308
## MASE   MASE  0.140625471
## ACF1   ACF1 -0.002039569
autoplot(fc_221_101) +
  labs(
    title = "Forecast from ARIMA(2,2,1)(1,0,1)[12]",
    x = "Time",
    y = "CPI"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5, size = 16, face = "bold"),
    axis.title = element_text(size = 12)
  )

fc_221_101
##          Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## Mar 2025       6.801219 6.756034 6.846405 6.732114 6.870324
## Apr 2025       6.796221 6.722389 6.870054 6.683304 6.909139
## May 2025       6.771009 6.677480 6.864537 6.627969 6.914048
## Jun 2025       6.760349 6.651244 6.869453 6.593487 6.927210
## Jul 2025       6.752015 6.629138 6.874892 6.564091 6.939939
forecast_errors <- fc_221_101$residuals

# Create histogram and density plot
ggplot(data.frame(error = forecast_errors), aes(x = error)) +
  geom_histogram(aes(y = ..density..), bins = 20, fill = "lightblue", color = "black") +
  geom_density(color = "red", size = 1) +
  labs(title = "Histogram and Density Plot of Forecast Errors",
       x = "Errors", y = "Density") +
  theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(density)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Don't know how to automatically pick scale for object of type <ts>. Defaulting
## to continuous.

# MOVING AVG FORECAST
autoplot(ma3_forecast_cpi) +
  labs(title = "3-Period Moving Average Forecast - CPI",
       x = "Time", y = "CPI") +
  theme_minimal()

ma3_forecast_cpi
##          Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## Mar 2025       6.781394 6.758255 6.804532 6.746007 6.816781
## Apr 2025       6.796112 6.748471 6.843753 6.723252 6.868973
## May 2025       6.807887 6.734019 6.881755 6.694916 6.920858
## Jun 2025       6.817307 6.716704 6.917910 6.663448 6.971166
## Jul 2025       6.824843 6.697683 6.952003 6.630368 7.019318
errors_ma3 <- ma3_forecast_cpi$residuals
ggplot(data.frame(error = errors_ma3), aes(x = error)) +
  geom_histogram(aes(y = ..density..), bins = 20, fill = "lightblue", color = "black") +
  geom_density(color = "red", size = 1) +
  labs(title = "Histogram and Density of Forecast Errors (MA 3)",
       x = "Forecast Error", y = "Density") +
  theme_minimal()
## Don't know how to automatically pick scale for object of type <ts>. Defaulting
## to continuous.

library(knitr)
acc_ma3 <- accuracy(ma3_forecast_cpi)
kable(acc_ma3[1, ], caption = "Accuracy Metrics: MA(3) Forecast")
Accuracy Metrics: MA(3) Forecast
x
ME -0.0033248
RMSE 0.0178183
MAE 0.0140117
MPE -0.0390282
MAPE 0.1756659
MASE 0.0729107
ACF1 0.3685254
accuracy(ma3_forecast_cpi)
##                        ME       RMSE        MAE         MPE      MAPE      MASE
## Training set -0.003324842 0.01781826 0.01401168 -0.03902821 0.1756659 0.0729107
##                   ACF1
## Training set 0.3685254
acc_ma3
##                        ME       RMSE        MAE         MPE      MAPE      MASE
## Training set -0.003324842 0.01781826 0.01401168 -0.03902821 0.1756659 0.0729107
##                   ACF1
## Training set 0.3685254
acc_naive <- accuracy(naive_cpi)
acc_ma3   <- accuracy(ma3_forecast_cpi)
acc_ets   <- accuracy(ets_forecast_cpi)
acc_hw    <- accuracy(hw_forecast_cpi)
acc_arima <- accuracy(fc_221_101)

# Combine the in-sample accuracy results
accuracy_table <- rbind(
  "Naive"                  = acc_naive[1, ],
  "Moving Avg 3"           = acc_ma3[1, ],
  "Exponential Smoothing" = acc_ets[1, ],
  "Holt-Winters"           = acc_hw[1, ],
  "Adj ARIMA(2,2,1)(1,0,1)[12]" = acc_arima[1, ]
)
library(knitr)
kable(accuracy_table, caption = "Forecast Accuracy Comparison")
Forecast Accuracy Comparison
ME RMSE MAE MPE MAPE MASE ACF1
Naive -0.0161399 0.0419156 0.0330104 -0.1966698 0.4131712 0.1694558 0.3002229
Moving Avg 3 -0.0033248 0.0178183 0.0140117 -0.0390282 0.1756659 0.0729107 0.3685254
Exponential Smoothing 0.0027625 0.0385093 0.0298007 0.0353249 0.3748983 0.1529791 0.2777416
Holt-Winters 0.0047187 0.0481593 0.0370507 0.0658897 0.4761771 0.1901965 0.4734754
Adj ARIMA(2,2,1)(1,0,1)[12] 0.0024544 0.0346161 0.0267360 0.0292109 0.3379801 0.1372469 0.0001317
accuracy_table
##                                       ME       RMSE        MAE         MPE
## Naive                       -0.016139896 0.04191559 0.03301036 -0.19666978
## Moving Avg 3                -0.003324842 0.01781826 0.01401168 -0.03902821
## Exponential Smoothing        0.002762472 0.03850933 0.02980067  0.03532486
## Holt-Winters                 0.004718651 0.04815929 0.03705070  0.06588973
## Adj ARIMA(2,2,1)(1,0,1)[12]  0.002454401 0.03461615 0.02673601  0.02921089
##                                  MAPE      MASE         ACF1
## Naive                       0.4131712 0.1694558 0.3002229312
## Moving Avg 3                0.1756659 0.0729107 0.3685254089
## Exponential Smoothing       0.3748983 0.1529791 0.2777415531
## Holt-Winters                0.4761771 0.1901965 0.4734753985
## Adj ARIMA(2,2,1)(1,0,1)[12] 0.3379801 0.1372469 0.0001316574